Pandas DataFrame


In [3]:
lc_data = pd.DataFrame.from_csv('./lc_dataframe(cleaning).csv')
lc_data = lc_data.reset_index()
lc_data.tail()


Out[3]:
loan_amnt term int_rate installment grade sub_grade emp_title emp_length home_ownership annual_inc ... dti delinq_2yrs inq_last_6mths open_acc pub_rec revol_bal revol_util total_acc initial_list_status acc_now_delinq
268131 31050 60 21.99 857.40 6 61 1 10 1 875000.0 ... 9.66 1 0 10 0 25770 79.3 13 0 0
268132 10800 36 7.89 337.89 1 15 1 8 1 92400.0 ... 19.62 1 0 11 0 9760 68.7 36 1 0
268133 9000 36 9.17 286.92 2 22 1 1 1 80000.0 ... 3.97 1 0 8 0 6320 51.8 17 0 0
268134 14400 60 25.99 431.06 6 65 0 11 5 62000.0 ... 16.88 0 1 9 1 5677 45.1 30 0 0
268135 8000 36 12.59 267.98 3 32 1 4 4 45000.0 ... 26.21 0 0 12 0 9097 50.8 47 1 0

5 rows × 25 columns

V1 Term (연속형자료형)


In [25]:
x = lc_data['term']
n, bins, patches = plt.hist(x, bins = 100, color = 'r')
plt.xlabel('month')
plt.ylabel('counts')
plt.title('V1 Term Histogram')
plt.show()


V2 int_rate(연속형 자료형)


In [31]:
x = lc_data['int_rate']
n, bins, patches = plt.hist(x, color = 'g')
plt.xlabel('interest rate %')
plt.ylabel('counts')
plt.title('V2 int_rate Histogram (Interest rate on loan)')
plt.show()


V3 installment (연속형 자료형)


In [33]:
x = lc_data['installment']
n, bins, patches = plt.hist(x)
plt.xlabel('installment')
plt.ylabel('counts')
plt.title('V3 installment Histogram')
plt.show()


V9 annual_inc (연속형 자료형)


In [47]:
x = lc_data['annual_inc']
n, bins, patches = plt.hist(x, color = 'r',range = (0, 100000))
plt.xlabel('annual income for users')
plt.ylabel('counts')
plt.title('V9 annual_inc Histogram')
plt.show()


V12 loan_amnt (연속형 자료형)


In [49]:
x = lc_data['loan_amnt']
n, bins, patches = plt.hist(x, color = 'g')
plt.xlabel('loan amount')
plt.ylabel('counts')
plt.title('V12 loan_amnt Histogram')
plt.show()


V13 desc (연속형 자료형)


In [75]:
x = lc_data['desc']
sns.distplot(x)
plt.xlabel('length of letter on doc')
plt.ylabel('density')
plt.title('V13 desc Histogram')
plt.show()


V15 dti (연속형 자료형)


In [66]:
x = lc_data['dti']
n, bins, patches = plt.hist(x, color='r')
plt.xlabel('A ratio total monthly debt payments on the total debt obligations')
plt.ylabel('counts')
plt.title('V15 dti Histogram')
plt.show()


V16 delinq_2yrs (연속형 자료형)


In [67]:
x = lc_data['delinq_2yrs']
n, bins, patches = plt.hist(x, color = 'g')
plt.xlabel('The number ofincidences of delinquency in the borrowerin 2yrs')
plt.ylabel('counts')
plt.title('V16 delinq_2yrs Histogram')
plt.show()


V17 inq_last_6mths (연속형 자료형)


In [68]:
x = lc_data['inq_last_6mths']
n, bins, patches = plt.hist(x)
plt.xlabel('The number of inquiries in past 6 months')
plt.ylabel('counts')
plt.title('V16 delinq_2yrs Histogram')
plt.show()


V18 open_acc (연속형 자료형)


In [69]:
x = lc_data['open_acc']
n, bins, patches = plt.hist(x, color = 'r')
plt.xlabel('The number of open credit lines in the borrower\'s credit file')
plt.ylabel('counts')
plt.title('V18 open_acc Histogram')
plt.show()


V19 pub_rec (연속형 자료형)


In [74]:
x = lc_data['pub_rec']
sns.distplot(x, color = 'g')
plt.xlabel('Number of derogatory public records')
plt.ylabel('density')
plt.title('V19 pub_rec Histogram')
plt.show()


V20 revol_bal (연속형 자료형)


In [73]:
x = lc_data['revol_bal']
sns.distplot(x)
plt.xlabel('Total credit revolving balance')
plt.ylabel('density')
plt.title('V20 revol_bal Histogram')
plt.show()


V21 revol_util (연속형 자료형)


In [80]:
x = lc_data['revol_util']
sns.distplot(x)
plt.xlabel('the amount of credit the borrower is using relative')
plt.ylabel('density')
plt.title('V21 revol_util Histogram')
plt.show()


V22 total_acc (연속형 자료형)


In [78]:
x = lc_data['total_acc']
plt.hist(x)
plt.xlabel('The total number of credit lines currently in the borrower\'s credit file')
plt.ylabel('counts')
plt.title('## V22 total_acc Histogram')
plt.show()


V24 acc_now_delinq (연속형 자료형)


In [81]:
x = lc_data['acc_now_delinq']
plt.hist(x)
plt.xlabel('The number of accounts on which the borrower is now delinquent.')
plt.ylabel('counts')
plt.title('V24 acc_now_delinq Histogram')
plt.show()